# -*- coding: utf-8 -*-
import scrapy


class ItcastSpider(scrapy.Spider):
    # 爬虫名字（爬虫启动时使用:scrapy crawl itcast）
    name = 'itcast'
    # 爬取范围，防止爬虫爬到别的网站
    allowed_domains = ['itcast.cn']
    # 开始爬取的地址
    start_urls = ['http://www.itcast.cn/channel/teacher.shtml']

    # 数据提取方法，接收下载中间件传过来的response
    def parse(self, response):
        # names = response.xpath("//div[@class='tea_con']//li/div/h3/text()").extract()
        # print(names)
        teachers = response.xpath("//div[@class='tea_con']//li")
        for t in teachers:
            name = t.xpath("./div/h3/text()").extract_first()
            position = t.xpath("./div/h4/text()").extract_first()
            profile = t.xpath("./div/p/text()").extract_first()
            item = dict(
                name = name,
                position=position,
                profile=profile
            )
            yield item
